System information
library(tidyverse)## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl)
library(gtsummary)
library(survival)
library(survminer)## Loading required package: ggpubr
os <- sessionInfo()$running
if (str_detect(os, "Ubuntu")) {
path <- '~/Biostatistics_M215_Project/data/'
} else if (str_detect(os, "mac")) {
path <- '~/Downloads/Biostat 215 Project/Biostatistics_M215_Project/Data/'
} else if(str_detect(os, "Windows")){
path <- "C:/Users/alexc/Desktop/215/Biostats-M215/"
}Data clean and recode
endpoint = read_excel(paste0(path, 'endpoints.xls'))
endpoint_colnames = c('ID', 'Intervention Group',
'Vitality Status as of 6/1/2006',
'Breast Cancer Status as of 6/1/2006 or last prior contact',
'Other Cancer (invasive, not breast) Status as of 6/1/2006',
'Breast Cancer Contribute to Death',
'Year Breast Cancer Diagnosed', 'Cancer Grade',
'Dummy Variable for Cancer Grade 2',
'Dummy Variable for Cancer Grade 3',
'Dummy Variable for Unknown Cancer Grade',
'Cancer Stage, AJCC 6th',
'Dummy Variable for Stage 2 AJCC 6th',
'Dummy Variable for Stage 3 AJCC 6th',
'WHEL Clinical Site', 'Recurrence Flag',
'Years from Diagnosis to WHEL Study Entry',
'Years from Study Entry to Recurrence/New Primary, or to Censor',
'Years from Diagnosis to Recurrence/New Primary, or to Censor',
'Years from Diagnosis to Death or Censor')
colnames(endpoint) = endpoint_colnames
endpoint$`Intervention Group` = ifelse(endpoint$`Intervention Group` == 3, 'Intervention', 'Comparison')
endpoint$`Vitality Status as of 6/1/2006` = ifelse(endpoint$`Vitality Status as of 6/1/2006` == 0, 'Dead', ifelse(endpoint$`Vitality Status as of 6/1/2006` == 1, 'Alive', 'Unknown'))
for (i in 1:nrow(endpoint)){
if (endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`[i] == 0){
endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`[i] = 'No Evidence of Recurrence'
} else if(endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`[i] == 1){
endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`[i] = 'Confirmed – New Primary Breast Cancer'
}else if(endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`[i] == 2){
endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`[i] = 'Confirmed – Local Recurrence'
}else if(endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`[i] == 3){
endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`[i] = 'Confirmed – Regional Recurrence'
}else{
endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`[i] = 'Confirmed – Distant Recurrence '}
}
endpoint$`Other Cancer (invasive, not breast) Status as of 6/1/2006` = ifelse(endpoint$`Other Cancer (invasive, not breast) Status as of 6/1/2006` == 0, 'No evidence of Disease', ifelse(endpoint$`Other Cancer (invasive, not breast) Status as of 6/1/2006` == 1, 'Reported Other Cancer (not confirmed)', 'Confirmed Other Cancer'))
endpoint$`Breast Cancer Contribute to Death`[endpoint$`Breast Cancer Contribute to Death` == -1] = 'Not Dead'
endpoint$`Breast Cancer Contribute to Death`[endpoint$`Breast Cancer Contribute to Death` == 0] = 'Dead from a cause other than Breast Cancer'
endpoint$`Breast Cancer Contribute to Death`[endpoint$`Breast Cancer Contribute to Death` == 1] = 'Dead from Breast Cancer'
endpoint$`Breast Cancer Contribute to Death`[endpoint$`Breast Cancer Contribute to Death` == 2] = 'Dead from Cancer, not confirmed breast but likely so'
endpoint$`Cancer Grade`[endpoint$`Cancer Grade` == 0] = 'Grade Not Applicable or Not Available'
endpoint$`Cancer Grade`[endpoint$`Cancer Grade` == 1] = 'Grade I, Well Differentiated'
endpoint$`Cancer Grade`[endpoint$`Cancer Grade` == 2] = 'Grade II, Moderately Differentiated'
endpoint$`Cancer Grade`[endpoint$`Cancer Grade` == 3] = 'Grade III, Poorly Differentiated'
endpoint$`Cancer Stage, AJCC 6th`[endpoint$`Cancer Stage, AJCC 6th` == 1] = 'Stage I'
endpoint$`Cancer Stage, AJCC 6th`[endpoint$`Cancer Stage, AJCC 6th` == 2] = 'Stage IIA'
endpoint$`Cancer Stage, AJCC 6th`[endpoint$`Cancer Stage, AJCC 6th` == 3] = 'Stage IIB'
endpoint$`Cancer Stage, AJCC 6th`[endpoint$`Cancer Stage, AJCC 6th` == 4] = 'Stage IIIA'
endpoint$`Cancer Stage, AJCC 6th`[endpoint$`Cancer Stage, AJCC 6th` == 5] = 'Stage IIIB'
endpoint$`WHEL Clinical Site`[endpoint$`WHEL Clinical Site` == 1] = 'Site A in California'
endpoint$`WHEL Clinical Site`[endpoint$`WHEL Clinical Site` == 3] = 'Site B in California'
endpoint$`WHEL Clinical Site`[endpoint$`WHEL Clinical Site` == 5] = 'Site C in California'
endpoint$`WHEL Clinical Site`[endpoint$`WHEL Clinical Site` == 7] = 'Site in Arizona'
endpoint$`WHEL Clinical Site`[endpoint$`WHEL Clinical Site` == 9] = 'Site D in California'
endpoint$`WHEL Clinical Site`[endpoint$`WHEL Clinical Site` == 11] = 'Site in Texas'
endpoint$`WHEL Clinical Site`[endpoint$`WHEL Clinical Site` == 13] = 'Site in Oregon'
endpoint$`Recurrence Flag`[endpoint$`Recurrence Flag` == 0] = 'No Invasive Breast Cancer Events'
endpoint$`Recurrence Flag`[endpoint$`Recurrence Flag` == 1] = 'Invasive Breast Cancer Event'endpoint %>%
tbl_summary(by = 'Intervention Group',
include=-c(ID, `Year Breast Cancer Diagnosed`,
`Dummy Variable for Cancer Grade 2`,
`Dummy Variable for Cancer Grade 3`,
`Dummy Variable for Unknown Cancer Grade`,
`Dummy Variable for Stage 2 AJCC 6th`,
`Dummy Variable for Stage 3 AJCC 6th`),
missing_text = "Missing") %>%
add_p()| Characteristic | Comparison, N = 1,5511 | Intervention, N = 1,5371 | p-value2 |
|---|---|---|---|
| Vitality Status as of 6/1/2006 | 0.8 | ||
| Alive | 1,391 (90%) | 1,381 (90%) | |
| Dead | 160 (10%) | 155 (10%) | |
| Unknown | 0 (0%) | 1 (<0.1%) | |
| Breast Cancer Status as of 6/1/2006 or last prior contact | 0.6 | ||
| Confirmed – Distant Recurrence | 189 (12%) | 168 (11%) | |
| Confirmed – Local Recurrence | 28 (1.8%) | 35 (2.3%) | |
| Confirmed – New Primary Breast Cancer | 35 (2.3%) | 43 (2.8%) | |
| Confirmed – Regional Recurrence | 10 (0.6%) | 10 (0.7%) | |
| No Evidence of Recurrence | 1,289 (83%) | 1,281 (83%) | |
| Other Cancer (invasive, not breast) Status as of 6/1/2006 | 0.5 | ||
| Confirmed Other Cancer | 60 (3.9%) | 58 (3.8%) | |
| No evidence of Disease | 1,483 (96%) | 1,472 (96%) | |
| Reported Other Cancer (not confirmed) | 4 (0.3%) | 1 (<0.1%) | |
| Missing | 4 | 6 | |
| Breast Cancer Contribute to Death | 0.8 | ||
| Dead from a cause other than Breast Cancer | 25 (1.6%) | 28 (1.8%) | |
| Dead from Breast Cancer | 135 (8.7%) | 126 (8.2%) | |
| Dead from Cancer, not confirmed breast but likely so | 0 (0%) | 1 (<0.1%) | |
| Not Dead | 1,391 (90%) | 1,382 (90%) | |
| Cancer Grade | >0.9 | ||
| Grade I, Well Differentiated | 245 (16%) | 239 (16%) | |
| Grade II, Moderately Differentiated | 620 (40%) | 620 (40%) | |
| Grade III, Poorly Differentiated | 557 (36%) | 551 (36%) | |
| Grade Not Applicable or Not Available | 129 (8.3%) | 127 (8.3%) | |
| Cancer Stage, AJCC 6th | >0.9 | ||
| Stage I | 607 (39%) | 584 (38%) | |
| Stage IIA | 511 (33%) | 515 (34%) | |
| Stage IIB | 190 (12%) | 194 (13%) | |
| Stage IIIA | 185 (12%) | 188 (12%) | |
| Stage IIIB | 58 (3.7%) | 56 (3.6%) | |
| WHEL Clinical Site | >0.9 | ||
| Site A in California | 258 (17%) | 272 (18%) | |
| Site B in California | 226 (15%) | 210 (14%) | |
| Site C in California | 267 (17%) | 257 (17%) | |
| Site D in California | 260 (17%) | 256 (17%) | |
| Site in Arizona | 242 (16%) | 233 (15%) | |
| Site in Oregon | 117 (7.5%) | 116 (7.5%) | |
| Site in Texas | 181 (12%) | 193 (13%) | |
| Recurrence Flag | 0.9 | ||
| Invasive Breast Cancer Event | 262 (17%) | 256 (17%) | |
| No Invasive Breast Cancer Events | 1,289 (83%) | 1,281 (83%) | |
| Years from Diagnosis to WHEL Study Entry | 1.76 (1.05, 2.81) | 1.84 (1.04, 2.80) | 0.8 |
| Years from Study Entry to Recurrence/New Primary, or to Censor | 7.12 (5.87, 8.44) | 7.11 (5.83, 8.53) | >0.9 |
| Years from Diagnosis to Recurrence/New Primary, or to Censor | 8.97 (7.36, 10.64) | 9.00 (7.35, 10.67) | 0.8 |
| Years from Diagnosis to Death or Censor | 9.18 (7.76, 10.87) | 9.28 (7.82, 10.89) | 0.6 |
|
1
n (%); Median (IQR)
2
Fisher's exact test; Pearson's Chi-squared test; Wilcoxon rank sum test
|
|||
demo = read_excel("../Data/demographics.xls")
phbase = read_excel("../Data/phbase.xls")
nds = read_excel("../Data/ndsfoody4.xls")
medical = read_excel("../Data/Medical.xls")
# We need the following variables:
## Survival time
SurvTime = as.numeric(endpoint$`Years from Diagnosis to Death or Censor`)
## Group and Status
Group = as_factor(endpoint$`Intervention Group`)
Group = relevel(Group, ref = "Comparison")
a = as_factor(endpoint$`Vitality Status as of 6/1/2006`)
Status = ifelse(a == "Alive", 0, 1)
## Age at randomization, y
AgeIdx = ifelse(demo$`age at rand` < 55, "<55", ">=55") # Age indicator (<=55 or not)
## Cancer stage at diagnosis
a = endpoint$`Cancer Stage, AJCC 6th`
CancerStage = as_factor(a)
## Hormone receptor status
a = medical$`Estr Recep`
b = medical$`Prog Recep`
HormoneRecep = ifelse(a==1 & b==1, "ER+/PR+",
ifelse(a==1 & b==0, "ER+/PR-",
ifelse(a==0 & b==1, "ER-/PR+",
ifelse(a==0 & b==0, "ER-/PR-", NA))))
## Time from Diag to Rand
a = endpoint$`Years from Diagnosis to WHEL Study Entry`
#TimeDiagRand = as.numeric(ifelse(a <=1, 0,
# ifelse(a <=2, 1,
# ifelse(a <=3, 2, 3
# )))) # Time from diagnosis to randomization
TimeDiagRand = a
## Tumor differentiation
a = endpoint$`Cancer Grade`
TumorDiff = as_factor(a)
## No. of positive nodes (Number axillary lymph nodes positive)
a = medical$`Node Pos`
PosNodes = as_factor(ifelse(a==0, "0",
ifelse(a <= 3, "1~3",
ifelse(a <= 6, "4~6", ">6"))))
PosNodes = relevel(PosNodes, ref = "0")
## Tumor size
a = medical$`Tumor Size`
TumorSize = as_factor(ifelse(a < 2, "0~2",
ifelse(a < 3, "2~3",
ifelse(a < 4, "3~4",
ifelse(a < 5, "4~5", ">5")))))
TumorSize = relevel(TumorSize, ref="0~2")
## Physical activity
a = phbase$`NEW METS`
PhysicalAct = ifelse(a <= 210, "<210",
ifelse(a <= 615, "211~615",
ifelse(a <= 1290, "616~1290", ">1290")))
## Energy intake
b = matrix(NA, nrow = length(a), ncol=1)
colnames(b) = "KCal"
b[endpoint$ID %in% nds$ID] = nds$Kcal
KCal = as_factor(ifelse(b <= 1430, "<1430",
ifelse(b <= 1680, "1430~1680",
ifelse(b <= 1980, "1681~1980",
ifelse(b > 1980, ">1980", NA)))))
KCal = relevel(KCal, ref="<1430")
##### PUT THEM TOGETHER #####
AllCauseMortalityData = data.frame(
SurvTime, Group, Status, AgeIdx, CancerStage, HormoneRecep, TimeDiagRand,
TumorDiff, PosNodes, TumorSize, PhysicalAct, KCal
)
library(DT)
AllCauseMortalityData %>%
mutate(SurvTime = round(SurvTime, 3),
Status = ifelse(Status==0, "Alive", "Dead")) %>%
datatable(., extensions = 'Scroller', options = list(
deferRender = TRUE,
scrollY = 200,
scroller = TRUE
))#############################library(miceRanger)
ImputedAll <- AllCauseMortalityData %>%
miceRanger(
m = 3,
returnModels = TRUE,
verbose = TRUE
)## One or more of the specified variables to impute contains no missing values. These will remain as a predictor, however they will not be imputed.
## Converting characters to factors.
##
## Process started at 2021-11-19 23:46:12
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
##
## dataset 1
## iteration 1 | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 2 | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 3 | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 4 | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 5 | HormoneRecep | PosNodes | TumorSize | KCal
##
## dataset 2
## iteration 1 | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 2 | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 3 | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 4 | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 5 | HormoneRecep | PosNodes | TumorSize | KCal
##
## dataset 3
## iteration 1 | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 2 | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 3 | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 4 | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 5 | HormoneRecep | PosNodes | TumorSize | KCal
ImputedData = as.data.frame(completeData(ImputedAll)[[3]])#plotModelError(ImputedAll, vars = 'allNumeric')
#plotDistributions(ImputedAll, vars = 'allNumeric')
plotVarImportance(ImputedAll)#coxph(Surv(SurvTime, Status) ~ ., data=ImputedData)
library(sjPlot)
library(sjmisc)##
## Attaching package: 'sjmisc'
## The following object is masked from 'package:purrr':
##
## is_empty
## The following object is masked from 'package:tidyr':
##
## replace_na
## The following object is masked from 'package:tibble':
##
## add_case
library(sjlabelled)##
## Attaching package: 'sjlabelled'
## The following object is masked from 'package:forcats':
##
## as_factor
## The following object is masked from 'package:dplyr':
##
## as_label
## The following object is masked from 'package:ggplot2':
##
## as_label
# Standard table
UniversalMod = coxph(Surv(SurvTime, Status) ~ ., data=ImputedData)
tab_model(UniversalMod)## Argument 'df_method' is deprecated. Please use 'ci_method' instead.
| Surv(SurvTime, Status) | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| Group [Intervention] | 0.96 | 0.77 – 1.20 | 0.703 |
| AgeIdx [>=55] | 1.61 | 1.28 – 2.03 | <0.001 |
| CancerStage [Stage I] | 0.81 | 0.52 – 1.26 | 0.350 |
| CancerStage [Stage IIB] | 1.25 | 0.79 – 1.97 | 0.349 |
| CancerStage [Stage IIIA] | 0.74 | 0.26 – 2.05 | 0.559 |
| CancerStage [Stage IIIB] | 0.88 | 0.27 – 2.84 | 0.834 |
| HormoneRecep [ER-/PR+] | 1.25 | 0.76 – 2.07 | 0.381 |
| HormoneRecep [ER+/PR-] | 0.95 | 0.66 – 1.39 | 0.809 |
| HormoneRecep [ER+/PR+] | 0.77 | 0.58 – 1.03 | 0.083 |
| TimeDiagRand | 0.63 | 0.56 – 0.71 | <0.001 |
|
TumorDiff [Grade II, Moderately Differentiated] |
1.23 | 0.79 – 1.91 | 0.371 |
|
TumorDiff [Grade I, Well Differentiated] |
0.57 | 0.31 – 1.04 | 0.068 |
|
TumorDiff [Grade III, Poorly Differentiated] |
1.36 | 0.87 – 2.13 | 0.177 |
| PosNodes [1~3] | 1.15 | 0.73 – 1.79 | 0.552 |
| PosNodes [4~6] | 2.73 | 0.92 – 8.14 | 0.071 |
| PosNodes [>6] | 4.66 | 1.53 – 14.16 | 0.007 |
| TumorSize [2~3] | 1.65 | 1.16 – 2.35 | 0.006 |
| TumorSize [3~4] | 1.43 | 0.90 – 2.27 | 0.127 |
| TumorSize [4~5] | 1.45 | 0.85 – 2.47 | 0.169 |
| TumorSize [>5] | 1.82 | 1.09 – 3.06 | 0.023 |
| PhysicalAct [>1290] | 0.65 | 0.46 – 0.91 | 0.011 |
| PhysicalAct [211~615] | 1.03 | 0.78 – 1.37 | 0.826 |
| PhysicalAct [616~1290] | 0.84 | 0.62 – 1.14 | 0.265 |
| KCal [1430~1680] | 1.04 | 0.78 – 1.38 | 0.789 |
| KCal [>1980] | 1.35 | 0.97 – 1.86 | 0.073 |
| KCal [1681~1980] | 0.84 | 0.61 – 1.15 | 0.281 |
| Observations | 3088 | ||
| R2 Nagelkerke | 0.095 | ||
# Model Diagnosis
## Martingale Residual
MartResid = residuals(UniversalMod, type="martingale")
ggplot(data = ImputedData, mapping = aes(x = CancerStage, y = MartResid)) +
geom_violin() +
geom_point() +
labs(title = "Martingale Residual Plot") +
theme_bw() + theme(legend.key = element_blank())ggplot(data = ImputedData, mapping = aes(x = KCal, y = MartResid)) +
geom_violin() +
geom_point() +
labs(title = "Martingale Residual Plot") +
theme_bw() + theme(legend.key = element_blank())ggplot(data = ImputedData, mapping = aes(x = TimeDiagRand, y = MartResid)) +
geom_point() +
geom_smooth() +
labs(title = "Time from Diagnosis to Randomization") +
theme_bw() + theme(legend.key = element_blank())## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Cox-Snell Residual
Event = ImputedData$Status
CSResid = -(MartResid - Event)
fit_coxsnell <- coxph(formula = Surv(CSResid, Event) ~ 1,
ties = c("efron","breslow","exact")[1])
## Nelson-Aalen estimator for baseline hazard (all covariates zero)
df_base_haz <- basehaz(fit_coxsnell, centered = FALSE)
## Plot
ggplot(data = df_base_haz, mapping = aes(x = time, y = hazard)) +
geom_point() +
labs(x = "Cox-Snell residuals as pseudo observed times",
y = "Estimated cumulative hazard at pseudo observed times") +
theme_bw() + theme(legend.key = element_blank()) +
geom_abline(intercept = 0, color="blue", cex=1, alpha=0.5)We have already fitted a universal model, now we are going to fit models for both groups.
library(survival)
library(finalfit)##
## Attaching package: 'finalfit'
## The following object is masked from 'package:sjlabelled':
##
## remove_labels
dependent_os <- "Surv(SurvTime, Status)"
explanatory <- c("AgeIdx", "CancerStage", "HormoneRecep", "TimeDiagRand",
"TumorDiff", "PosNodes", "TumorSize", "PhysicalAct", "KCal")
ImputedData %>%
summary_factorlist(dependent_os,
explanatory, fit_id=TRUE) -> TabStart## Dependent variable is a survival object
TabStart## label levels all
## AgeIdx <55 1825 (59.1)
## >=55 1263 (40.9)
## CancerStage Stage IIA 1026 (33.2)
## Stage I 1191 (38.6)
## Stage IIB 384 (12.4)
## Stage IIIA 373 (12.1)
## Stage IIIB 114 (3.7)
## HormoneRecep ER-/PR- 631 (20.4)
## ER-/PR+ 132 (4.3)
## ER+/PR- 374 (12.1)
## ER+/PR+ 1951 (63.2)
## TimeDiagRand Mean (SD) 2.0 (1.0)
## TumorDiff Grade Not Applicable or Not Available 256 (8.3)
## Grade II, Moderately Differentiated 1240 (40.2)
## Grade I, Well Differentiated 484 (15.7)
## Grade III, Poorly Differentiated 1108 (35.9)
## PosNodes 0 1775 (57.5)
## 1~3 885 (28.7)
## 4~6 231 (7.5)
## >6 197 (6.4)
## TumorSize 0~2 1523 (49.3)
## 2~3 863 (27.9)
## 3~4 335 (10.8)
## 4~5 151 (4.9)
## >5 216 (7.0)
## PhysicalAct <210 850 (27.5)
## >1290 738 (23.9)
## 211~615 751 (24.3)
## 616~1290 749 (24.3)
## KCal <1430 1187 (38.4)
## 1430~1680 786 (25.5)
## >1980 430 (13.9)
## 1681~1980 685 (22.2)
## fit_id index
## AgeIdx<55 1
## AgeIdx>=55 2
## CancerStageStage IIA 3
## CancerStageStage I 4
## CancerStageStage IIB 5
## CancerStageStage IIIA 6
## CancerStageStage IIIB 7
## HormoneRecepER-/PR- 8
## HormoneRecepER-/PR+ 9
## HormoneRecepER+/PR- 10
## HormoneRecepER+/PR+ 11
## TimeDiagRand 12
## TumorDiffGrade Not Applicable or Not Available 13
## TumorDiffGrade II, Moderately Differentiated 14
## TumorDiffGrade I, Well Differentiated 15
## TumorDiffGrade III, Poorly Differentiated 16
## PosNodes0 17
## PosNodes1~3 18
## PosNodes4~6 19
## PosNodes>6 20
## TumorSize0~2 21
## TumorSize2~3 22
## TumorSize3~4 23
## TumorSize4~5 24
## TumorSize>5 25
## PhysicalAct<210 26
## PhysicalAct>1290 27
## PhysicalAct211~615 28
## PhysicalAct616~1290 29
## KCal<1430 30
## KCal1430~1680 31
## KCal>1980 32
## KCal1681~1980 33
# Cox model for the intervention group
ImputedData %>%
filter(Group=="Intervention") %>%
finalfit(dependent_os, explanatory) -> TabIntervention
knitr::kable(TabIntervention, row.names=FALSE, align=c("l", "l", "r", "r", "r", "r"))| Dependent: Surv(SurvTime, Status) | all | HR (univariable) | HR (multivariable) | |
|---|---|---|---|---|
| AgeIdx | <55 | 908 (100.0) | - | - |
| >=55 | 629 (100.0) | 1.16 (0.85-1.59, p=0.348) | 1.63 (1.18-2.27, p=0.004) | |
| CancerStage | Stage IIA | 515 (100.0) | - | - |
| Stage I | 584 (100.0) | 0.69 (0.43-1.09, p=0.110) | 1.25 (0.66-2.36, p=0.493) | |
| Stage IIB | 194 (100.0) | 2.13 (1.34-3.39, p=0.001) | 1.21 (0.64-2.29, p=0.565) | |
| Stage IIIA | 188 (100.0) | 2.51 (1.60-3.93, p<0.001) | 0.27 (0.03-2.23, p=0.225) | |
| Stage IIIB | 56 (100.0) | 3.58 (1.95-6.56, p<0.001) | 0.26 (0.03-2.50, p=0.244) | |
| HormoneRecep | ER-/PR- | 307 (100.0) | - | - |
| ER-/PR+ | 53 (100.0) | 0.97 (0.46-2.05, p=0.930) | 1.03 (0.47-2.23, p=0.942) | |
| ER+/PR- | 200 (100.0) | 0.82 (0.50-1.34, p=0.429) | 0.84 (0.50-1.41, p=0.503) | |
| ER+/PR+ | 977 (100.0) | 0.52 (0.36-0.75, p=0.001) | 0.63 (0.42-0.96, p=0.032) | |
| TimeDiagRand | Mean (SD) | 2.0 (1.0) | 0.69 (0.59-0.82, p<0.001) | 0.65 (0.55-0.77, p<0.001) |
| TumorDiff | Grade Not Applicable or Not Available | 127 (100.0) | - | - |
| Grade II, Moderately Differentiated | 620 (100.0) | 1.49 (0.74-3.01, p=0.263) | 1.49 (0.73-3.05, p=0.270) | |
| Grade I, Well Differentiated | 239 (100.0) | 0.84 (0.36-1.96, p=0.680) | 0.98 (0.41-2.31, p=0.958) | |
| Grade III, Poorly Differentiated | 551 (100.0) | 2.26 (1.13-4.51, p=0.021) | 1.83 (0.89-3.75, p=0.100) | |
| PosNodes | 0 | 879 (100.0) | - | - |
| 1~3 | 437 (100.0) | 1.78 (1.21-2.60, p=0.003) | 1.93 (1.01-3.67, p=0.046) | |
| 4~6 | 116 (100.0) | 2.94 (1.79-4.85, p<0.001) | 8.99 (1.03-78.29, p=0.047) | |
| >6 | 105 (100.0) | 4.50 (2.87-7.07, p<0.001) | 13.94 (1.57-123.50, p=0.018) | |
| TumorSize | 0~2 | 753 (100.0) | - | - |
| 2~3 | 421 (100.0) | 2.61 (1.76-3.87, p<0.001) | 2.34 (1.40-3.90, p=0.001) | |
| 3~4 | 175 (100.0) | 1.82 (1.05-3.15, p=0.033) | 1.31 (0.65-2.66, p=0.447) | |
| 4~5 | 79 (100.0) | 2.99 (1.61-5.56, p=0.001) | 1.93 (0.89-4.18, p=0.096) | |
| >5 | 109 (100.0) | 4.07 (2.45-6.75, p<0.001) | 3.17 (1.51-6.65, p=0.002) | |
| PhysicalAct | <210 | 443 (100.0) | - | - |
| >1290 | 351 (100.0) | 0.61 (0.38-0.99, p=0.047) | 0.67 (0.41-1.09, p=0.103) | |
| 211~615 | 368 (100.0) | 1.08 (0.72-1.61, p=0.717) | 1.13 (0.75-1.70, p=0.551) | |
| 616~1290 | 375 (100.0) | 0.84 (0.55-1.29, p=0.429) | 0.89 (0.57-1.37, p=0.588) | |
| KCal | <1430 | 606 (100.0) | - | - |
| 1430~1680 | 387 (100.0) | 1.00 (0.66-1.49, p=0.981) | 1.03 (0.69-1.56, p=0.876) | |
| >1980 | 210 (100.0) | 1.41 (0.91-2.19, p=0.122) | 1.61 (1.03-2.51, p=0.038) | |
| 1681~1980 | 334 (100.0) | 0.80 (0.51-1.26, p=0.328) | 0.91 (0.57-1.45, p=0.698) |
# Cox model for the comparison group
ImputedData %>%
filter(Group=="Comparison") %>%
finalfit(dependent_os, explanatory) -> TabComparison
knitr::kable(TabIntervention, row.names=FALSE, align=c("l", "l", "r", "r", "r", "r"))| Dependent: Surv(SurvTime, Status) | all | HR (univariable) | HR (multivariable) | |
|---|---|---|---|---|
| AgeIdx | <55 | 908 (100.0) | - | - |
| >=55 | 629 (100.0) | 1.16 (0.85-1.59, p=0.348) | 1.63 (1.18-2.27, p=0.004) | |
| CancerStage | Stage IIA | 515 (100.0) | - | - |
| Stage I | 584 (100.0) | 0.69 (0.43-1.09, p=0.110) | 1.25 (0.66-2.36, p=0.493) | |
| Stage IIB | 194 (100.0) | 2.13 (1.34-3.39, p=0.001) | 1.21 (0.64-2.29, p=0.565) | |
| Stage IIIA | 188 (100.0) | 2.51 (1.60-3.93, p<0.001) | 0.27 (0.03-2.23, p=0.225) | |
| Stage IIIB | 56 (100.0) | 3.58 (1.95-6.56, p<0.001) | 0.26 (0.03-2.50, p=0.244) | |
| HormoneRecep | ER-/PR- | 307 (100.0) | - | - |
| ER-/PR+ | 53 (100.0) | 0.97 (0.46-2.05, p=0.930) | 1.03 (0.47-2.23, p=0.942) | |
| ER+/PR- | 200 (100.0) | 0.82 (0.50-1.34, p=0.429) | 0.84 (0.50-1.41, p=0.503) | |
| ER+/PR+ | 977 (100.0) | 0.52 (0.36-0.75, p=0.001) | 0.63 (0.42-0.96, p=0.032) | |
| TimeDiagRand | Mean (SD) | 2.0 (1.0) | 0.69 (0.59-0.82, p<0.001) | 0.65 (0.55-0.77, p<0.001) |
| TumorDiff | Grade Not Applicable or Not Available | 127 (100.0) | - | - |
| Grade II, Moderately Differentiated | 620 (100.0) | 1.49 (0.74-3.01, p=0.263) | 1.49 (0.73-3.05, p=0.270) | |
| Grade I, Well Differentiated | 239 (100.0) | 0.84 (0.36-1.96, p=0.680) | 0.98 (0.41-2.31, p=0.958) | |
| Grade III, Poorly Differentiated | 551 (100.0) | 2.26 (1.13-4.51, p=0.021) | 1.83 (0.89-3.75, p=0.100) | |
| PosNodes | 0 | 879 (100.0) | - | - |
| 1~3 | 437 (100.0) | 1.78 (1.21-2.60, p=0.003) | 1.93 (1.01-3.67, p=0.046) | |
| 4~6 | 116 (100.0) | 2.94 (1.79-4.85, p<0.001) | 8.99 (1.03-78.29, p=0.047) | |
| >6 | 105 (100.0) | 4.50 (2.87-7.07, p<0.001) | 13.94 (1.57-123.50, p=0.018) | |
| TumorSize | 0~2 | 753 (100.0) | - | - |
| 2~3 | 421 (100.0) | 2.61 (1.76-3.87, p<0.001) | 2.34 (1.40-3.90, p=0.001) | |
| 3~4 | 175 (100.0) | 1.82 (1.05-3.15, p=0.033) | 1.31 (0.65-2.66, p=0.447) | |
| 4~5 | 79 (100.0) | 2.99 (1.61-5.56, p=0.001) | 1.93 (0.89-4.18, p=0.096) | |
| >5 | 109 (100.0) | 4.07 (2.45-6.75, p<0.001) | 3.17 (1.51-6.65, p=0.002) | |
| PhysicalAct | <210 | 443 (100.0) | - | - |
| >1290 | 351 (100.0) | 0.61 (0.38-0.99, p=0.047) | 0.67 (0.41-1.09, p=0.103) | |
| 211~615 | 368 (100.0) | 1.08 (0.72-1.61, p=0.717) | 1.13 (0.75-1.70, p=0.551) | |
| 616~1290 | 375 (100.0) | 0.84 (0.55-1.29, p=0.429) | 0.89 (0.57-1.37, p=0.588) | |
| KCal | <1430 | 606 (100.0) | - | - |
| 1430~1680 | 387 (100.0) | 1.00 (0.66-1.49, p=0.981) | 1.03 (0.69-1.56, p=0.876) | |
| >1980 | 210 (100.0) | 1.41 (0.91-2.19, p=0.122) | 1.61 (1.03-2.51, p=0.038) | |
| 1681~1980 | 334 (100.0) | 0.80 (0.51-1.26, p=0.328) | 0.91 (0.57-1.45, p=0.698) |
# Intervention
IdxInt = ImputedData$Group=="Intervention"
IntMod = coxph(Surv(SurvTime, Status) ~ ., data=ImputedData[IdxInt,])
# Martingale Residual
MartResid = residuals(IntMod, type="martingale")
ggplot(data = ImputedData[IdxInt,], mapping = aes(x = CancerStage, y = MartResid)) +
geom_violin() +
geom_point() +
labs(title = "Martingale Residual Plot for Cancer Stage") +
theme_bw() + theme(legend.key = element_blank()) + ylab("Martingale Residual")ggplot(data = ImputedData[IdxInt,], mapping = aes(x = TimeDiagRand, y = MartResid)) +
geom_point() +
geom_smooth() +
labs(title = "Time from Diagnosis to Randomization") +
theme_bw() + theme(legend.key = element_blank()) + ylab("Martingale Residual")## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(data = ImputedData[IdxInt,], mapping = aes(x = TumorDiff, y = MartResid)) +
geom_point() +
geom_violin() +
labs(title = "Tumor Differentiation") +
theme_bw() + theme(legend.key = element_blank(), axis.text.x=element_text(angle=60,hjust=1)) +
ylab("Martingale Residual")ggplot(data = ImputedData[IdxInt,], mapping = aes(x = KCal, y = MartResid)) +
geom_point() +
geom_violin() +
labs(title = "KCal") +
theme_bw() + theme(legend.key = element_blank(), axis.text.x=element_text(angle=0,hjust=1)) +
ylab("Martingale Residual")ggplot(data = ImputedData[IdxInt,], mapping = aes(x = factor(TumorSize), y = MartResid)) +
geom_violin() +
geom_point() +
labs(title = "Tumor Size") +
theme_bw() + theme(legend.key = element_blank()) +
ylab("Martingale Residual") + xlab("Tumor Size")# Schoenfeld Individual Test
test.ph = cox.zph(IntMod)
ggcoxzph(test.ph)## Warning: Removed 40 row(s) containing missing values (geom_path).
## Warning: Removed 156 rows containing missing values (geom_point).
## Warning: Removed 40 row(s) containing missing values (geom_path).
## Warning: Removed 40 row(s) containing missing values (geom_path).
# Cox-Snell Residual
Event = ImputedData$Status[IdxInt]
CSResid = -(MartResid - Event)
fit_coxsnell <- coxph(formula = Surv(CSResid, Event) ~ 1,
ties = c("efron","breslow","exact")[1])
## Nelson-Aalen estimator for baseline hazard (all covariates zero)
df_base_haz <- basehaz(fit_coxsnell, centered = FALSE)
## Plot
ggplot(data = df_base_haz, mapping = aes(x = time, y = hazard)) +
geom_point() +
labs(x = "Cox-Snell residuals as pseudo observed times",
y = "Estimated cumulative hazard at pseudo observed times") +
theme_bw() + theme(legend.key = element_blank()) +
geom_abline(intercept = 0, color="blue", cex=1, alpha=0.5) #### (b) Comparison group
# Comparison
IdxCom = ImputedData$Group=="Comparison"
ComMod = coxph(Surv(SurvTime, Status) ~ ., data=ImputedData[IdxCom,], method = 'breslow')
# Martingale Residual
MartResid = residuals(ComMod, type="martingale")
ggplot(data = ImputedData[IdxCom,], mapping = aes(x = CancerStage, y = MartResid)) +
geom_violin() +
geom_point() +
labs(title = "Martingale Residual Plot for Cancer Stage") +
theme_bw() + theme(legend.key = element_blank()) + ylab("Martingale Residual")ggplot(data = ImputedData[IdxCom,], mapping = aes(x = TimeDiagRand, y = MartResid)) +
geom_point() +
geom_smooth() +
labs(title = "Time from Diagnosis to Randomization") +
theme_bw() + theme(legend.key = element_blank()) + ylab("Martingale Residual")## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(data = ImputedData[IdxCom,], mapping = aes(x = TumorDiff, y = MartResid)) +
geom_point() +
geom_violin() +
labs(title = "Tumor Differentiation") +
theme_bw() + theme(legend.key = element_blank(), axis.text.x=element_text(angle=60,hjust=1)) +
ylab("Martingale Residual")ggplot(data = ImputedData[IdxCom,], mapping = aes(x = KCal, y = MartResid)) +
geom_point() +
geom_violin() +
labs(title = "KCal") +
theme_bw() + theme(legend.key = element_blank(), axis.text.x=element_text(angle=0,hjust=1)) +
ylab("Martingale Residual")ggplot(data = ImputedData[IdxCom,], mapping = aes(x = factor(TumorSize), y = MartResid)) +
geom_violin() +
geom_point() +
labs(title = "Tumor Size") +
theme_bw() + theme(legend.key = element_blank()) +
ylab("Martingale Residual") + xlab("Tumor Size")# Schoenfeld Individual Test
test.ph = cox.zph(ComMod)
ggcoxzph(test.ph)## Warning: Removed 40 row(s) containing missing values (geom_path).
## Warning: Removed 160 rows containing missing values (geom_point).
## Warning: Removed 40 row(s) containing missing values (geom_path).
## Warning: Removed 40 row(s) containing missing values (geom_path).
# Cox-Snell Residual
Event = ImputedData$Status[IdxCom]
CSResid = -(MartResid - Event)
fit_coxsnell <- coxph(formula = Surv(CSResid, Event) ~ 1,
ties = c("efron","breslow","exact")[1])
## Nelson-Aalen estimator for baseline hazard (all covariates zero)
df_base_haz <- basehaz(fit_coxsnell, centered = FALSE)
## Plot
ggplot(data = df_base_haz, mapping = aes(x = time, y = hazard)) +
geom_point() +
labs(x = "Cox-Snell residuals as pseudo observed times",
y = "Estimated cumulative hazard at pseudo observed times") +
theme_bw() + theme(legend.key = element_blank()) +
geom_abline(intercept = 0, color="blue", cex=1, alpha=0.5) +
labs(title = "Quality of overall fit: Cox-Snell")explanatory = c("AgeIdx", "CancerStage", "HormoneRecep",
"TumorDiff", "PosNodes", "TumorSize", "PhysicalAct", "KCal")
for (item in explanatory) {
len = ImputedData %>%
select(item) %>%
unique(.) %>%
nrow(.)
category = ImputedData %>%
select(item) %>%
unique(.)
hi = item
for (i in 1:len) {
cat = category[i,]
print(paste0("Explanatory: ", hi))
print(paste0("Category: ", cat))
idx = ImputedData[, hi] == cat
print(summary(coxph(Surv(SurvTime, Status) ~ Group, data=ImputedData[idx, ])))
}
}## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(item)` instead of `item` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## [1] "Explanatory: AgeIdx"
## [1] "Category: <55"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 1825, number of events= 170
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention 0.003098 1.003103 0.153395 0.02 0.984
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 1.003 0.9969 0.7426 1.355
##
## Concordance= 0.496 (se = 0.02 )
## Likelihood ratio test= 0 on 1 df, p=1
## Wald test = 0 on 1 df, p=1
## Score (logrank) test = 0 on 1 df, p=1
##
## [1] "Explanatory: AgeIdx"
## [1] "Category: >=55"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 1263, number of events= 146
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention -0.04775 0.95338 0.16559 -0.288 0.773
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 0.9534 1.049 0.6891 1.319
##
## Concordance= 0.5 (se = 0.022 )
## Likelihood ratio test= 0.08 on 1 df, p=0.8
## Wald test = 0.08 on 1 df, p=0.8
## Score (logrank) test = 0.08 on 1 df, p=0.8
##
## [1] "Explanatory: CancerStage"
## [1] "Category: Stage IIA"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 1026, number of events= 91
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention -0.2017 0.8174 0.2107 -0.957 0.339
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 0.8174 1.223 0.5408 1.235
##
## Concordance= 0.527 (se = 0.027 )
## Likelihood ratio test= 0.92 on 1 df, p=0.3
## Wald test = 0.92 on 1 df, p=0.3
## Score (logrank) test = 0.92 on 1 df, p=0.3
##
## [1] "Explanatory: CancerStage"
## [1] "Category: Stage I"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 1191, number of events= 65
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention 0.07616 1.07914 0.24811 0.307 0.759
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 1.079 0.9267 0.6636 1.755
##
## Concordance= 0.516 (se = 0.033 )
## Likelihood ratio test= 0.09 on 1 df, p=0.8
## Wald test = 0.09 on 1 df, p=0.8
## Score (logrank) test = 0.09 on 1 df, p=0.8
##
## [1] "Explanatory: CancerStage"
## [1] "Category: Stage IIB"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 384, number of events= 52
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention 0.4093 1.5058 0.2857 1.433 0.152
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 1.506 0.6641 0.8601 2.636
##
## Concordance= 0.553 (se = 0.036 )
## Likelihood ratio test= 2.1 on 1 df, p=0.1
## Wald test = 2.05 on 1 df, p=0.2
## Score (logrank) test = 2.08 on 1 df, p=0.1
##
## [1] "Explanatory: CancerStage"
## [1] "Category: Stage IIIA"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 373, number of events= 70
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention 0.03257 1.03311 0.23917 0.136 0.892
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 1.033 0.968 0.6465 1.651
##
## Concordance= 0.505 (se = 0.031 )
## Likelihood ratio test= 0.02 on 1 df, p=0.9
## Wald test = 0.02 on 1 df, p=0.9
## Score (logrank) test = 0.02 on 1 df, p=0.9
##
## [1] "Explanatory: CancerStage"
## [1] "Category: Stage IIIB"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 114, number of events= 38
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention -0.5660 0.5678 0.3371 -1.679 0.0932 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 0.5678 1.761 0.2933 1.099
##
## Concordance= 0.567 (se = 0.041 )
## Likelihood ratio test= 2.93 on 1 df, p=0.09
## Wald test = 2.82 on 1 df, p=0.09
## Score (logrank) test = 2.89 on 1 df, p=0.09
##
## [1] "Explanatory: HormoneRecep"
## [1] "Category: ER+/PR+"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 1951, number of events= 166
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention -0.1094 0.8964 0.1554 -0.704 0.482
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 0.8964 1.116 0.661 1.216
##
## Concordance= 0.513 (se = 0.02 )
## Likelihood ratio test= 0.5 on 1 df, p=0.5
## Wald test = 0.5 on 1 df, p=0.5
## Score (logrank) test = 0.5 on 1 df, p=0.5
##
## [1] "Explanatory: HormoneRecep"
## [1] "Category: ER-/PR+"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 132, number of events= 19
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention 0.1224 1.1303 0.4653 0.263 0.792
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 1.13 0.8848 0.454 2.814
##
## Concordance= 0.521 (se = 0.06 )
## Likelihood ratio test= 0.07 on 1 df, p=0.8
## Wald test = 0.07 on 1 df, p=0.8
## Score (logrank) test = 0.07 on 1 df, p=0.8
##
## [1] "Explanatory: HormoneRecep"
## [1] "Category: ER-/PR-"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 631, number of events= 86
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention 0.1256 1.1338 0.2157 0.582 0.56
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 1.134 0.882 0.7429 1.731
##
## Concordance= 0.517 (se = 0.028 )
## Likelihood ratio test= 0.34 on 1 df, p=0.6
## Wald test = 0.34 on 1 df, p=0.6
## Score (logrank) test = 0.34 on 1 df, p=0.6
##
## [1] "Explanatory: HormoneRecep"
## [1] "Category: ER+/PR-"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 374, number of events= 45
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention 0.0401 1.0409 0.3002 0.134 0.894
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 1.041 0.9607 0.578 1.875
##
## Concordance= 0.494 (se = 0.039 )
## Likelihood ratio test= 0.02 on 1 df, p=0.9
## Wald test = 0.02 on 1 df, p=0.9
## Score (logrank) test = 0.02 on 1 df, p=0.9
##
## [1] "Explanatory: TumorDiff"
## [1] "Category: Grade Not Applicable or Not Available"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 256, number of events= 24
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention -0.4682 0.6261 0.4219 -1.11 0.267
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 0.6261 1.597 0.2739 1.431
##
## Concordance= 0.554 (se = 0.051 )
## Likelihood ratio test= 1.27 on 1 df, p=0.3
## Wald test = 1.23 on 1 df, p=0.3
## Score (logrank) test = 1.25 on 1 df, p=0.3
##
## [1] "Explanatory: TumorDiff"
## [1] "Category: Grade II, Moderately Differentiated"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 1240, number of events= 123
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention -0.09382 0.91045 0.18049 -0.52 0.603
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 0.9104 1.098 0.6392 1.297
##
## Concordance= 0.509 (se = 0.024 )
## Likelihood ratio test= 0.27 on 1 df, p=0.6
## Wald test = 0.27 on 1 df, p=0.6
## Score (logrank) test = 0.27 on 1 df, p=0.6
##
## [1] "Explanatory: TumorDiff"
## [1] "Category: Grade I, Well Differentiated"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 484, number of events= 20
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention 0.5902 1.8044 0.4691 1.258 0.208
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 1.804 0.5542 0.7196 4.525
##
## Concordance= 0.538 (se = 0.06 )
## Likelihood ratio test= 1.66 on 1 df, p=0.2
## Wald test = 1.58 on 1 df, p=0.2
## Score (logrank) test = 1.63 on 1 df, p=0.2
##
## [1] "Explanatory: TumorDiff"
## [1] "Category: Grade III, Poorly Differentiated"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 1108, number of events= 149
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention 0.03348 1.03405 0.16386 0.204 0.838
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 1.034 0.9671 0.75 1.426
##
## Concordance= 0.507 (se = 0.021 )
## Likelihood ratio test= 0.04 on 1 df, p=0.8
## Wald test = 0.04 on 1 df, p=0.8
## Score (logrank) test = 0.04 on 1 df, p=0.8
##
## [1] "Explanatory: PosNodes"
## [1] "Category: 1~3"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 885, number of events= 88
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention 0.2416 1.2733 0.2147 1.125 0.26
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 1.273 0.7854 0.836 1.939
##
## Concordance= 0.531 (se = 0.028 )
## Likelihood ratio test= 1.28 on 1 df, p=0.3
## Wald test = 1.27 on 1 df, p=0.3
## Score (logrank) test = 1.27 on 1 df, p=0.3
##
## [1] "Explanatory: PosNodes"
## [1] "Category: 0"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 1775, number of events= 125
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention -0.1203 0.8867 0.1794 -0.671 0.503
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 0.8867 1.128 0.6239 1.26
##
## Concordance= 0.515 (se = 0.023 )
## Likelihood ratio test= 0.45 on 1 df, p=0.5
## Wald test = 0.45 on 1 df, p=0.5
## Score (logrank) test = 0.45 on 1 df, p=0.5
##
## [1] "Explanatory: PosNodes"
## [1] "Category: 4~6"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 231, number of events= 40
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention 0.1119 1.1184 0.3167 0.353 0.724
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 1.118 0.8941 0.6013 2.081
##
## Concordance= 0.521 (se = 0.041 )
## Likelihood ratio test= 0.13 on 1 df, p=0.7
## Wald test = 0.12 on 1 df, p=0.7
## Score (logrank) test = 0.13 on 1 df, p=0.7
##
## [1] "Explanatory: PosNodes"
## [1] "Category: >6"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 197, number of events= 63
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention -0.4325 0.6489 0.2541 -1.702 0.0888 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 0.6489 1.541 0.3943 1.068
##
## Concordance= 0.556 (se = 0.032 )
## Likelihood ratio test= 2.92 on 1 df, p=0.09
## Wald test = 2.9 on 1 df, p=0.09
## Score (logrank) test = 2.94 on 1 df, p=0.09
##
## [1] "Explanatory: TumorSize"
## [1] "Category: 0~2"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 1523, number of events= 94
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention -0.1561 0.8555 0.2070 -0.754 0.451
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 0.8555 1.169 0.5701 1.284
##
## Concordance= 0.509 (se = 0.027 )
## Likelihood ratio test= 0.57 on 1 df, p=0.5
## Wald test = 0.57 on 1 df, p=0.5
## Score (logrank) test = 0.57 on 1 df, p=0.5
##
## [1] "Explanatory: TumorSize"
## [1] "Category: 2~3"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 863, number of events= 109
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention 0.2499 1.2839 0.1923 1.3 0.194
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 1.284 0.7789 0.8808 1.872
##
## Concordance= 0.53 (se = 0.025 )
## Likelihood ratio test= 1.7 on 1 df, p=0.2
## Wald test = 1.69 on 1 df, p=0.2
## Score (logrank) test = 1.7 on 1 df, p=0.2
##
## [1] "Explanatory: TumorSize"
## [1] "Category: 3~4"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 335, number of events= 46
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention -0.5830 0.5582 0.3022 -1.929 0.0537 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 0.5582 1.791 0.3087 1.009
##
## Concordance= 0.574 (se = 0.037 )
## Likelihood ratio test= 3.83 on 1 df, p=0.05
## Wald test = 3.72 on 1 df, p=0.05
## Score (logrank) test = 3.83 on 1 df, p=0.05
##
## [1] "Explanatory: TumorSize"
## [1] "Category: 4~5"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 151, number of events= 25
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention -0.1039 0.9013 0.4005 -0.259 0.795
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 0.9013 1.11 0.4111 1.976
##
## Concordance= 0.532 (se = 0.051 )
## Likelihood ratio test= 0.07 on 1 df, p=0.8
## Wald test = 0.07 on 1 df, p=0.8
## Score (logrank) test = 0.07 on 1 df, p=0.8
##
## [1] "Explanatory: TumorSize"
## [1] "Category: >5"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 216, number of events= 42
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention 0.1843 1.2023 0.3101 0.594 0.552
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 1.202 0.8317 0.6547 2.208
##
## Concordance= 0.517 (se = 0.04 )
## Likelihood ratio test= 0.35 on 1 df, p=0.6
## Wald test = 0.35 on 1 df, p=0.6
## Score (logrank) test = 0.35 on 1 df, p=0.6
##
## [1] "Explanatory: PhysicalAct"
## [1] "Category: <210"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 850, number of events= 103
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention -0.1491 0.8615 0.1972 -0.756 0.45
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 0.8615 1.161 0.5853 1.268
##
## Concordance= 0.516 (se = 0.026 )
## Likelihood ratio test= 0.57 on 1 df, p=0.4
## Wald test = 0.57 on 1 df, p=0.4
## Score (logrank) test = 0.57 on 1 df, p=0.4
##
## [1] "Explanatory: PhysicalAct"
## [1] "Category: >1290"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 738, number of events= 52
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention 0.01448 1.01458 0.27767 0.052 0.958
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 1.015 0.9856 0.5888 1.748
##
## Concordance= 0.512 (se = 0.036 )
## Likelihood ratio test= 0 on 1 df, p=1
## Wald test = 0 on 1 df, p=1
## Score (logrank) test = 0 on 1 df, p=1
##
## [1] "Explanatory: PhysicalAct"
## [1] "Category: 616~1290"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 749, number of events= 71
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention 0.0336 1.0342 0.2374 0.141 0.887
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 1.034 0.967 0.6493 1.647
##
## Concordance= 0.491 (se = 0.031 )
## Likelihood ratio test= 0.02 on 1 df, p=0.9
## Wald test = 0.02 on 1 df, p=0.9
## Score (logrank) test = 0.02 on 1 df, p=0.9
##
## [1] "Explanatory: PhysicalAct"
## [1] "Category: 211~615"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 751, number of events= 90
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention 0.03249 1.03303 0.21086 0.154 0.878
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 1.033 0.968 0.6833 1.562
##
## Concordance= 0.506 (se = 0.027 )
## Likelihood ratio test= 0.02 on 1 df, p=0.9
## Wald test = 0.02 on 1 df, p=0.9
## Score (logrank) test = 0.02 on 1 df, p=0.9
##
## [1] "Explanatory: KCal"
## [1] "Category: <1430"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 1187, number of events= 121
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention -0.03023 0.97022 0.18188 -0.166 0.868
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 0.9702 1.031 0.6793 1.386
##
## Concordance= 0.504 (se = 0.024 )
## Likelihood ratio test= 0.03 on 1 df, p=0.9
## Wald test = 0.03 on 1 df, p=0.9
## Score (logrank) test = 0.03 on 1 df, p=0.9
##
## [1] "Explanatory: KCal"
## [1] "Category: 1430~1680"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 786, number of events= 84
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention -0.1436 0.8662 0.2188 -0.656 0.512
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 0.8662 1.154 0.5641 1.33
##
## Concordance= 0.517 (se = 0.028 )
## Likelihood ratio test= 0.43 on 1 df, p=0.5
## Wald test = 0.43 on 1 df, p=0.5
## Score (logrank) test = 0.43 on 1 df, p=0.5
##
## [1] "Explanatory: KCal"
## [1] "Category: 1681~1980"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 685, number of events= 57
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention -0.06203 0.93986 0.26528 -0.234 0.815
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 0.9399 1.064 0.5588 1.581
##
## Concordance= 0.507 (se = 0.034 )
## Likelihood ratio test= 0.05 on 1 df, p=0.8
## Wald test = 0.05 on 1 df, p=0.8
## Score (logrank) test = 0.05 on 1 df, p=0.8
##
## [1] "Explanatory: KCal"
## [1] "Category: >1980"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx,
## ])
##
## n= 430, number of events= 54
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GroupIntervention 0.2411 1.2726 0.2742 0.879 0.379
##
## exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention 1.273 0.7858 0.7435 2.178
##
## Concordance= 0.533 (se = 0.035 )
## Likelihood ratio test= 0.78 on 1 df, p=0.4
## Wald test = 0.77 on 1 df, p=0.4
## Score (logrank) test = 0.78 on 1 df, p=0.4
AndersonPlot <- function(mod.group, Adjust="") {
t = ImputedData$SurvTime
reptime <- function(l, t){
x <- numeric(max(t))
for(i in min(t):max(t)){
diff <- i - t
diff <- diff[diff >= 0]
x[i] <- l[which.min(diff)]
}
return(x)
}
H1 <- mod.group$hazard[mod.group$strata == "Comparison"]
H2 <- mod.group$hazard[mod.group$strata == "Intervention"]
t1 <- mod.group$time[mod.group$strata == "Comparison"]
t2 <- mod.group$time[mod.group$strata == "Intervention"]
H1 <- reptime(H1, t1)
H2 <- reptime(H2, t2)
ggplot() +
geom_step(aes(x=H1[1:15], y=H2[1:15])) +
xlab("Breslow's est of Comparison Group") +
ylab("Breslow's est of Intervention Group") +
ggtitle(paste("Anderson's Plot\n ", Adjust) )
}# Adjusting for Cancer stage
mod.group = basehaz(coxph(Surv(SurvTime, Status) ~ strata(factor(Group))+CancerStage, data=ImputedData))
AndersonPlot(mod.group)## Warning: Removed 1 row(s) containing missing values (geom_path).
# Adjusting for Time from diag to rand
mod.group = basehaz(coxph(Surv(SurvTime, Status) ~ strata(factor(Group))+TimeDiagRand, data=ImputedData))
AndersonPlot(mod.group, "Time from Diag to Rand")## Warning: Removed 1 row(s) containing missing values (geom_path).
# Adjusting for Age (<55 or not)
mod.group = basehaz(coxph(Surv(SurvTime, Status) ~ strata(Group)+AgeIdx, data=ImputedData))
AndersonPlot(mod.group, "Age at Diagnosis")## Warning: Removed 1 row(s) containing missing values (geom_path).
# Adjusting for Hormone Receptor Status
mod.group = basehaz(coxph(Surv(SurvTime, Status) ~ strata(Group)+HormoneRecep, data=ImputedData))
AndersonPlot(mod.group, "Hormone Receptor Status")## Warning: Removed 1 row(s) containing missing values (geom_path).
# Adjusting for others
mod.group = basehaz(coxph(Surv(SurvTime, Status) ~ strata(factor(Group))+., data=ImputedData))
AndersonPlot(mod.group, "All covariates")## Warning: Removed 1 row(s) containing missing values (geom_path).
# Confirmed breast cancer event
a1 = endpoint$`Intervention Group`
b1 = endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`
table(a1, b1)## b1
## a1 Confirmed – Distant Recurrence\t Confirmed – Local Recurrence
## Comparison 189 28
## Intervention 168 35
## b1
## a1 Confirmed – New Primary Breast Cancer
## Comparison 35
## Intervention 43
## b1
## a1 Confirmed – Regional Recurrence No Evidence of Recurrence
## Comparison 10 1289
## Intervention 10 1281
# Confirmed deaths
b2 = endpoint$`Breast Cancer Contribute to Death`
table(a1, b2)## b2
## a1 Dead from a cause other than Breast Cancer
## Comparison 25
## Intervention 28
## b2
## a1 Dead from Breast Cancer
## Comparison 135
## Intervention 126
## b2
## a1 Dead from Cancer, not confirmed breast but likely so Not Dead
## Comparison 0 1391
## Intervention 1 1382